package com.fortune.conf;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

import java.util.List;

public class WANGYIPageNewsSpider implements PageProcessor {

    // 抓取网站的相关配置，包括编码、抓取间隔、重试次数等
//    private Site site = Site.me().setRetryTimes(3).setSleepTime(100);

    private Site site = Site.me()
            .setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) ...")
            .setRetryTimes(3)
            .setSleepTime(5000) // 延长休眠时间
            .addHeader("Referer", "https://www.163.com/");

    public Site getSite() {
        return site;
    }

    public void process(Page page) {
        String tempImgStr = "https://images.xlcwx.com/xlcwx/m/community/2025/3/29/729d998416574141be96d9baccfcdd50.gif";
        String contentStr = page.getHtml().xpath("//*[@id=\"content\"]/div[2]").toString();
//        System.out.println(page.getHtml().xpath("//*[@id=\"contain\"]/div[2]/h1").toString());
//        System.out.println(page.getHtml().xpath("//*[@id=\"content\"]/div[2]").toString());

        List<Selectable> imgNodes = page.getHtml().xpath("//*[@id=\"content\"]/div[2]//img").nodes();
        for (Selectable img : imgNodes) {
            // 提取原始src属性
            String originalSrc = img.xpath("//img/@src").get();
            System.out.println(originalSrc);
            contentStr = contentStr.replace("&amp;","&");
            contentStr = contentStr.replace("onload=\"this.removeAttribute('width'); this.removeAttribute('height'); this.removeAttribute('onload');\"","");
            contentStr = contentStr.replace(originalSrc,tempImgStr);
        }
        System.out.println("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
        System.out.println(contentStr.toString());

//        page.putField("title", page.getHtml().xpath("//*[@id=\"article-container\"]/div[2]/div[1]/div/div[1]/h1").toString());
//        page.putField("contentImages",page.getHtml().xpath("//*[@id=\"mp-editor\"]/p/img/@data-src").all());
//        page.putField("content",page.getHtml().xpath("//*[@id=\"mp-editor\"]").all());

//        }
//        // 列表页
//        else {
//            // 文章url
//            page.addTargetRequests(
//                    page.getHtml().xpath("//*[@id=\"__next\"]/div/section/div/main/a/@href").all());
//            // 翻页url
//            page.addTargetRequests(
//                    page.getHtml().xpath("/html/body/div[3]/div[1]/div[3]/a[@class='page-btn-prev']/@href").all());
//        }
    }

    public static void main(String[] args) {
        String url ="https://www.163.com/dy/article/JSS55TF205568181.html";
        Spider.create(new WANGYIPageNewsSpider()).addUrl(url)
                .thread(3).run();
    }
}
